# ------------------------------------------------------------------------------
# Examines role of physician experience in testing decisions
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bash 04_physician_experience.sh
# ------------------------------------------------------------------------------

# Libraries --------------------------------------------------------------------
message("Loading libraries...")
library(here)
library(yaml)
library(tidyverse)
library(glue)
library(testit) # assert()
library(broom) # tidy()
library(data.table) #setnames()
library(lfe) # felm()
library(stargazer)

temp <- here("code", "07_other_physician_errors", "temp")
u <- modules::use(here("lib", "util.R"))

# Load Data --------------------------------------------------------------------
message("Loading data...")
overnight_lab <- ""
paths <- read_yaml(here::here("lib", "filepaths.yml"))
full_cohort <- readRDS(glue(paths$analysis$full_cohort))
test_cohort <- readRDS(glue(paths$analysis$test_cohort)) %>%
  filter(!exclude)

# Physician volume -------------------------------------------------------------
message("Getting physician volume...")
phys_volume <- full_cohort %>%
  group_by(enc_md_id) %>%
  summarize(physician_volume = n()) %>%
  ungroup

test_cohort <- test_cohort %>%
  u$safe_left_join(phys_volume) %>%
  filter(!is.na(physician_volume)) %>%
  mutate(
    physician_volume = (physician_volume - mean(physician_volume))/sd(physician_volume)
  ) %>%
  setnames(
    c("p__ensemble__stent_or_cabg_010_day__tested", "tile_stent_or_cabg_010_tested"),
    c("yhat", "yhat_decile")
  ) %>%
  mutate(yhat_decile = factor(yhat_decile) %>% relevel(ref = 10))

message("Num holdout observations w/non-null MD id: ", nrow(test_cohort))

# Volume regressions -----------------------------------------------------------
message("Regressing test decisions on risk and physician volume...")
phys_regs <- list()
outcome_vars <- c("test_010_day", "stent_or_cabg_010_day", "macetrop_030_pos")

for(outcome in outcome_vars){
  message("Running regression for ", outcome, "...")
  reg_df <- copy(test_cohort)
  if(outcome == "stent_or_cabg_010_day"){
    reg_df <- filter(reg_df, test_010_day)
  }else if(grepl("mace", outcome)){
    reg_df <- filter(reg_df, !test_010_day)
  }

  form <- reformulate(
    response = outcome,
    termlabels = c(
      "yhat*yhat_decile", "yhat*physician_volume", "1 | 0 | 0 | ptid"
    ),
  )

  fit <- felm(form, data = reg_df)
  phys_regs <- c(phys_regs, list(fit))
}

# Save stargazer ---------------------------------------------------------------
message("Writing regression table with stargazer...")
clean_tbl <- stargazer(
  phys_regs, omit.stat = c("f", "ser", "ll", "aic"), no.space = TRUE,
  dep.var.caption = ""
)

save_fp <- file.path(temp, "physician_volume_reg_tbl.tex")
write(clean_tbl, save_fp, append = FALSE)

message("Done.")
